In [13]:
"Hello World!"
Out[13]:
In [14]:
import pandas as pd
log = pd.read_csv("../../../software-data/projects/linux/linux_blame_log.csv.gz")
log.head()
Out[14]:
In [15]:
log.info()
In [16]:
top10 = log['author'].value_counts().head(10)
top10
Out[16]:
In [17]:
%matplotlib inline
ax = top10.plot.bar(title="Top 10 commiters (Linux kernel)")
ax.set_ylabel("number of last changed lines")
ax.set_xlabel("name of the committer");
In [18]:
log['timestamp'] = pd.to_datetime(log['timestamp'])
log.head()
Out[18]:
In [19]:
log['age'] = log['timestamp'].max() - log['timestamp']
log.head()
Out[19]:
In [20]:
log['component'] = log['path'].str.split("/").str[:2].str.join(":")
log.head()
Out[20]:
In [21]:
age_per_component = log.groupby('component')['age'].min()
age_per_component.head()
Out[21]:
In [22]:
age_per_component.sort_values().plot.bar(figsize=[15,5]);
In [ ]:
import pandas as pd
log = pd.read_csv("../../../software-data/projects/linux/linux_blame_log.csv.gz")
log['timestamp'] = pd.to_datetime(log['timestamp'])
log.head()
In [23]:
knowledge = log.groupby(
['path', 'author']).agg(
{'timestamp':'min', 'line':'count'}
)
knowledge.head()
Out[23]:
In [24]:
knowledge['all'] = knowledge.groupby('path')['line'].transform('sum')
knowledge['knowing'] = knowledge['line'] / knowledge['all']
knowledge.head()
Out[24]:
In [25]:
max_knowledge_per_file = knowledge.groupby(['path'])['knowing'].transform(max)
knowledge_carriers = knowledge[knowledge['knowing'] == max_knowledge_per_file]
knowledge_carriers = knowledge_carriers.reset_index(level=1)
knowledge_carriers.head()
Out[25]:
In [26]:
from ausi import d3
d3.create_json_for_zoomable_circle_packing(
knowledge_carriers.reset_index(),
'author',
'author',
'path',
'/',
'all',
'knowing',
'linux_circle_packing'
)